home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
FishMarket 1.0
/
FishMarket v1.0.iso
/
fishies
/
176-200
/
disk_191
/
blk
/
lex.c
< prev
next >
Wrap
C/C++ Source or Header
|
1992-05-06
|
17KB
|
897 lines
/*
* Lexical Analysis Functions Takes input stream and returns "tokens".
* Implements pre-processor, allowing #include's and #define's tokens are
* character strings.
*/
#include <stdio.h>
#include <ctype.h>
#include <functions.h>
#include <exec/types.h>
#include "listmac.h"
#include "std.h"
#include "lex.h"
struct StringElt {
struct Nod node;
char *buf;
};
struct Token {
struct Nod node;
char *buf;
short type, can_be_macro;
char chr;
};
struct Macro {
struct Nod node;
struct Lst arguments;
struct Lst tokens;
char *name;
short active, nargs;
};
struct ActiveMacro {
struct Nod node;
struct Macro *mac;
struct Token *curtok;
};
struct LexFile {
struct Nod node;
FILE *fp;
short newln;
short nextchar;
};
struct IfBlock {
struct Nod *node;
short skip_else;
};
struct Token *CopyToken ();
struct Macro *FindMacro ();
char *FindString ();
#define BUFLEN 100
static char buf[BUFLEN];
static short bufpos;
static struct Token
rawtok, *lasttok, *retok;
static struct LexFile *toplf;
static struct Lst macros, activemacs, files, ifblocks;
static short files_open = 0, init_done = 0;
static char *includestr, *definestr, *ifdefstr,
*ifndefstr, *elsestr, *endifstr;
static char *libbase = "hd:include/", libnam[80], basnam[80];
#define NHASH 53
struct Lst hashTab[NHASH];
/*
* Open a file for lexing - could be an include.
* Set newline flag to true.
*/
short OpenLexFile (name)
char *name;
{
struct LexFile *lf;
struct Macro *mac;
/*
* on first call, init environment vars
*/
if (!init_done) {
NewList (&files);
NewList (¯os);
NewList (&ifblocks);
NewList (&activemacs);
InitHash ();
includestr = FindString ("include");
definestr = FindString ("define");
ifdefstr = FindString ("ifdef");
ifndefstr = FindString ("ifndef");
elsestr = FindString ("else");
endifstr = FindString ("endif");
init_done = 1;
}
/*
* if there are no files currently open, prepare for a new one
*/
if (!files_open) {
while (mac = (struct Macro *) RemHead (¯os))
FreeMacro (mac);
ASSERT (!TEST (HEAD (&ifblocks)));
ASSERT (!TEST (HEAD (&activemacs)));
retok = NULL;
}
lf = NEW (struct LexFile);
if (!lf) return 0;
/*
* printf ("opening \"%s\"\n", name);
*/
lf->fp = fopen (name, "r");
if (!lf->fp) {
FREI (lf);
return 0;
}
lf->newln = 1;
lf->nextchar = getc (lf->fp);
AddHead (&files, lf);
toplf = lf;
if (feof (lf->fp)) {
CloseLexFile ();
return 0;
}
files_open = 1;
return 1;
}
/*
* Close top file returning 1 for the last file now closed.
*/
short CloseLexFile ()
{
if (!files_open) return 1;
fclose (toplf->fp);
Remove (toplf);
FREI (toplf);
toplf = HEAD (&files);
if (TEST (toplf)) return 0;
files_open = 0;
toplf = 0;
return 1;
}
/*
* free stuff up
*/
LexCleanup ()
{
struct Macro *mac, *nmac;
while (files_open) CloseLexFile ();
FreeHash ();
for (mac = HEAD (¯os); nmac = NEXT (mac); mac = nmac)
FreeMacro (mac);
ASSERT (!TEST (HEAD (&activemacs)));
/*
* initialization can be done again
*/
init_done = 0;
}
FreeMacro (mac)
struct Macro *mac;
{
struct Token *tok;
struct StringElt *se;
while (tok = (struct Token *) RemHead (&mac->tokens))
FREI (tok);
while (se = (struct StringElt *) RemHead (&mac->arguments))
FREI (se);
FREI (mac);
}
/*
* Get next raw character from file dealing with backslash escape chars.
* This is kind of wrong since these really ought to be recognized only
* inside strings. Doing it this way deals with backslashes before
* newlines.
*/
short NextC1 (fp)
FILE *fp;
{
short c, k, radix;
c = getc (fp);
if (c == '\\') {
c = getc (fp);
if (isdigit (c)) {
radix = 10;
if (c == '0') radix = 8;
k = (c - '0');
while (isdigit (c = getc (fp)))
k = k * radix + (c - '0');
ungetc (c, fp);
c = k;
} else {
switch (c) {
case '\\':
break;
case '"':
c = '.';
break;
case 'n':
c = '\n';
break;
case 'b':
c = '\b';
break;
case 't':
c = '\t';
break;
case '\n':
c = getc (fp);
}
}
}
return c;
}
/*
* Get next character from Top Lex File, updating nextchar.
*/
short NextC ()
{
short c;
if (!toplf) return EOF;
c = toplf->nextchar;
toplf->nextchar = NextC1 (toplf->fp);
return c;
}
/*
* Move the top lex file to the next newline.
*/
AdvanceEOL ()
{
if (!toplf) return;
while (toplf->nextchar != '\n' && !feof (toplf->fp))
NextC ();
}
/*
* Read a raw set of characters updating newline state. Sets the
* fields in rawtok to the type and string pointer, if any.
*/
RawToken ()
{
short c;
short nochar = 1, comment, nl;
bufpos = 0;
rawtok.can_be_macro = 1;
/*
* get a non-blank char
*/
while (nochar) {
/*
* get a character testing for end-of-file if EOF, just
* return that fact right away
*/
c = NextC ();
if (c == EOF) {
rawtok.type = RT_EOF;
return;
}
/*
* test next char: if it's white space other than newline,
* just clear the newline flag and continue looking.
*/
if (isspace (c) && c != '\n')
toplf->newln = 0;
else {
/*
* otherwise, this is potentially interesting, but it
* might be the start of a comment - if so, scan til
* the end of the comment (no nested comments ala
* "C"). If not, exit the loop.
*/
if (c == '/' && toplf->nextchar == '*') {
comment = 1;
while (comment) {
c = NextC ();
if (c == '*'
&& toplf->nextchar == '/') {
c = NextC ();
comment = 0;
}
}
} else nochar = 0;
}
}
nl = toplf->newln;
toplf->newln = 0;
bufpos = 0;
/*
* otherwise, read out the cases id symbol starting with char
*/
if (nl && c == '#')
rawtok.type = RT_ESC;
else if (c == '\'') {
rawtok.chr = NextC ();
c = NextC ();
/*
* Skip past weird character constants, like 'FOOL',
* making no attempt to deal with them correctly.
*/
comment = 0;
while (c != '\'' && comment < 6) {
c = NextC ();
comment++;
}
if (comment >= 6) printf ("error in character constant\n");
rawtok.type = RT_CHRC;
} else if (isalpha (c) || c == '_') {
buf[bufpos++] = c;
while (isalnum (toplf->nextchar) || toplf->nextchar == '_')
buf[bufpos++] = NextC ();
buf[bufpos] = 0;
rawtok.type = RT_ID;
rawtok.buf = FindString (buf);
} else if (c == '\n') {
toplf->newln = 1;
rawtok.type = RT_NEWLN;
} else if (isdigit (c)) {
buf[bufpos++] = c;
while (isdigit (toplf->nextchar)) buf[bufpos++] = NextC ();
buf[bufpos] = 0;
rawtok.buf = FindString (buf);
rawtok.type = RT_NUM;
} else if (c == '"') {
while ((c = NextC ()) != '"')
if (bufpos < BUFLEN)
buf[bufpos++] = c;
if (bufpos + 1 == BUFLEN) printf ("string overflow\n");
buf[bufpos] = 0;
rawtok.buf = FindString (buf);
rawtok.type = RT_STR;
} else {
rawtok.chr = c;
rawtok.type = RT_CHR;
}
}
/*
* Main function -- returns string pointer for next token makes include files
* and macros transparent reads from top open file and returns pointer to
* string of chars could be a symbol, could be a number, could be a character
*/
short NextToken (tbuf)
char **tbuf;
{
struct Token *tok;
struct Macro *mac;
struct ActiveMacro *amac;
short i;
char c;
while () {
/*
* get raw token from file or active macro
*/
amac = HEAD (&activemacs);
if (retok) {
tok = retok;
retok = NULL;
} else if (TEST (amac)) {
tok = amac->curtok;
if (!TEST (tok)) {
amac->mac->active = 0;
for (i = 0; i < amac->mac->nargs; i++)
FreeMacro (RemHead (¯os));
Remove (amac);
FREI (amac);
continue;
}
amac->curtok = NEXT (amac->curtok);
} else {
RawToken ();
tok = &rawtok;
}
switch (tok->type) {
case RT_EOF:
if (CloseLexFile ()) {
lasttok = tok;
return RT_EOF;
}
break;
case RT_ESC:
RawToken ();
if (rawtok.type != RT_ID) {
printf ("real problem with lexer directive\n");
AdvanceEOL ();
break;
}
if (rawtok.buf == includestr) Include ();
else if (rawtok.buf == definestr) Define ();
else if (rawtok.buf == ifdefstr) IfDef (1);
else if (rawtok.buf == ifndefstr) IfDef (0);
else if (rawtok.buf == elsestr) Else ();
else if (rawtok.buf == endifstr) EndIf ();
else {
printf ("unknown directive \"%s\"\n", rawtok.buf);
AdvanceEOL ();
}
break;
case RT_ID:
/*
* test for known macros (if this token can be one)
*/
if (tok->can_be_macro) {
mac = FindMacro (tok->buf);
if (mac) {
InstantiateMacro (mac);
break;
}
}
case RT_STR:
case RT_NUM:
*tbuf = tok->buf;
lasttok = tok;
return tok->type;
case RT_CHR:
case RT_CHRC:
*tbuf = &tok->chr;
lasttok = tok;
return tok->type;
}
}
}
/*
* The given macro has occured in the text, cause it to come into existence
* with argument substitution.
*/
InstantiateMacro (mac)
struct Macro *mac;
{
struct ActiveMacro *amac;
struct StringElt *arg;
struct Macro *smac;
struct Token *tok;
char *buf;
short vtok, level, endarglist;
if (mac->active) {
printf ("recursive macro ignored\n");
return;
}
/*
* read arguments, if any, and make them macros in their own right
* arguments are read recursively using NextToken (right? I'm not
* sure...)
*/
arg = HEAD (&mac->arguments);
if (TEST (arg)) {
/*
* get first open paren
*/
vtok = NextToken (&buf);
if (vtok != RT_CHR || *buf != '(')
goto argfail;
/*
* loop through args separated with commas
*/
endarglist = 0;
while (TEST (arg)) {
smac = NEW (struct Macro);
if (!smac) goto argfail;
smac->name = arg->buf;
smac->active = 0;
smac->nargs = 0;
NewList (&smac->arguments);
NewList (&smac->tokens);
/*
* scan out tokens allowing for nested commas
*/
level = 0;
while () {
vtok = NextToken (&buf);
if (vtok == RT_CHR) {
if (*buf == '(' || *buf == '{')
level++;
if (*buf == '}')
level--;
if (*buf == ')') {
if (level == 0) {
endarglist = 1;
break;
}
level--;
}
if (*buf == ',' && level == 0)
break;
}
tok = CopyToken (lasttok);
if (!tok) goto argfail;
tok->can_be_macro = 0;
AddTail (&smac->tokens, tok);
}
AddHead (¯os, smac);
if (endarglist) break;
arg = NEXT (arg);
}
if (vtok != RT_CHR || *buf != ')') goto argfail;
}
/*
* Macro does not become active in the global list until all
* arguments have been parsed and interpreted.
*/
amac = NEW (struct ActiveMacro);
if (!amac) return; /* what the fuck do I do here? */
amac->mac = mac;
amac->curtok = HEAD (&mac->tokens);
mac->active = 1;
AddHead (&activemacs, amac);
return;
argfail:
printf ("bad problem with arguments\n");
}
/*
* locate a specified macro in the available macros list
*/
struct Macro * FindMacro (buf)
char *buf;
{
struct Macro *mac;
for (mac = HEAD (¯os); TEST (mac); mac = NEXT (mac))
if (mac->name == buf)
return mac;
return NULL;
}
/*
* Do an "include" directive
*/
Include ()
{
short pos, ok, global;
char c;
/*
* get filename
*/
RawToken ();
ok = 0;
global = 0;
if (rawtok.type == RT_STR) {
ok = 1;
strcpy (basnam, rawtok.buf);
} else if (rawtok.type == RT_CHR && rawtok.chr == '<') {
ok = 1;
pos = 0;
while () {
c = NextC ();
if (c == '>') break;
basnam[pos++] = c;
}
basnam[pos] = 0;
global = 1;
}
AdvanceEOL ();
if (!ok) {
printf ("no file to include\n");
return;
}
if (!global) if (OpenLexFile (basnam)) return;
strcpy (libnam, libbase);
strcat (libnam, basnam);
if (OpenLexFile (libnam)) return;
printf ("error open include file <%s>\n", libnam);
}
/*
* Do the "define" directive.
*/
Define ()
{
struct Macro *mac = NULL;
struct Token *tok;
struct StringElt *se;
/*
* get identifier to define
*/
RawToken ();
if (rawtok.type != RT_ID) {
printf ("error in define - no identifier\n");
goto escape;
}
mac = NEW (struct Macro);
if (!mac) goto escape;
mac->name = rawtok.buf;
mac->active = 0;
mac->nargs = 0;
NewList (&mac->arguments);
NewList (&mac->tokens);
/*
* Look for parenthized argument list.
*/
if (toplf->nextchar == '(') {
RawToken ();
while () {
RawToken ();
/*
* deal with special case of "#define MACRO()"
*/
if (!mac->nargs && rawtok.type == RT_CHR
&& rawtok.chr == ')')
break;
if (rawtok.type != RT_ID) {
printf ("macro argument not an identifier\n");
goto escape;
}
se = NEW (struct StringElt);
if (!se) goto escape;
se->buf = rawtok.buf;
AddTail (&mac->arguments, se);
mac->nargs++;
RawToken ();
if (rawtok.type != RT_CHR) {
printf ("macro arg list delimiter not a character\n");
goto escape;
}
if (rawtok.chr == ')') break;
if (rawtok.chr != ',') {
printf ("macro arg list delimiter not ',' or ')'\n");
goto escape;
}
}
}
/*
* get the sequence of tokens which make up this definition
*/
while () {
RawToken ();
if (rawtok.type == RT_NEWLN || rawtok.type == RT_EOF) break;
tok = CopyToken (&rawtok);
if (!tok) goto escape;
tok->can_be_macro = 1;
AddTail (&mac->tokens, tok);
}
AddHead (¯os, mac);
return;
escape:
if (mac) FreeMacro (mac);
AdvanceEOL ();
}
/*
* Skip over a defined-out section. Returns 1 if it ends with #else, 0
* if it ends with #endif, -1 for EOF. Keeps track of nested if's
* being skipped.
*/
short SkipRemovedSec ()
{
short level = 0;
while () {
RawToken ();
if (rawtok.type == RT_EOF) return -1;
if (rawtok.type == RT_ESC) {
RawToken ();
if (rawtok.type == RT_ID) {
if (rawtok.buf == ifdefstr
|| rawtok.buf == ifndefstr) level++;
if (rawtok.buf == elsestr)
if (!level) return 1;
if (rawtok.buf == endifstr)
if (!level--) return 0;
}
}
}
}
/*
* Does the "ifdef"/"ifndef" - "else" - "endif" directive. Type is 1
* for ifdef and 0 for ifndef.
*/
IfDef (type)
short type;
{
struct Macro *mac;
struct IfBlock *ib;
short els;
RawToken ();
mac = FindMacro (rawtok.buf);
/*
* If condition is true, add a node to say "skip the next #else
* section" if false, skip this section. If section ends with an
* else, add a junk node to pop off for consistency at next #endif.
*/
if ((mac && type) || (!mac && !type)) {
ib = NEW (struct IfBlock);
if (!ib) return;
ib->skip_else = 1;
AddHead (&ifblocks, ib);
} else {
els = SkipRemovedSec ();
if (els == 1) {
ib = NEW (struct IfBlock);
if (!ib) return;
ib->skip_else = 0;
AddHead (&ifblocks, ib);
}
}
}
Else ()
{
struct IfBlock *ib;
short els;
ib = HEAD (&ifblocks);
ASSERT (TEST (ib));
Remove (ib);
if (!ib->skip_else) {
printf ("strange \"else\" block found\n");
} else {
els = SkipRemovedSec ();
if (els == 1 && ib->skip_else) {
printf ("strange \"else\" block found\n");
}
}
FREI (ib);
}
EndIf ()
{
struct IfBlock *ib;
ib = HEAD (&ifblocks);
ASSERT (TEST (ib));
Remove (ib);
FREI (ib);
}
/*
* Backs up one token
*/
Backspace ()
{
retok = lasttok;
}
struct Token * CopyToken (tok)
struct Token *tok;
{
struct Token *ntok;
ntok = NEW (struct Token);
if (ntok) *ntok = *tok;
return ntok;
}
/* HASH TABLE STUFF */
InitHash ()
{
short i;
for (i = 0; i < NHASH; i++)
NewList (&hashTab[i]);
}
/*
* Simple but (I hope) effective hash function.
*/
short HashVal (buf)
char *buf;
{
unsigned long hash;
hash = 0;
while (*buf) hash = hash * 3 + *buf++;
return hash % NHASH;
}
/*
* Finds (or creates) a stored string matching the given one return pointer
* which acts as unique ident for this string.
*/
char * FindString (buf)
char *buf;
{
short hash;
struct Lst *hlist;
struct StringElt *se;
char *sto;
hash = HashVal (buf);
hlist = &hashTab[hash];
for (se = HEAD (hlist); TEST (se); se = NEXT (se)) {
if (!strcmp (se->buf, buf))
return se->buf;
}
se = NEW (struct StringElt);
if (!se) return NULL;
sto = NEW_N (char, strlen (buf) + 1);
if (!sto) {
FREI (se);
return NULL;
}
AddTail (hlist, se);
strcpy (sto, buf);
se->buf = sto;
return sto;
}
FreeHash ()
{
short i;
struct StringElt *se, *nse;
for (i = 0; i < NHASH; i++) {
for (se = HEAD (&hashTab[i]); nse = NEXT (se); se = nse) {
FREE_N (se->buf, char, strlen (se->buf) + 1);
FREI (se);
}
}
}